Py.Cafe

ThomasD21M/

Week 4- NEA Data Visualizations

A Dash app for visualizing NEA data

DocsPricing
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
from dash import Dash, dcc, html, Input, Output
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the data
file_path = "https://raw.githubusercontent.com/plotly/Figure-Friday/main/2025/week-4/Post45_NEAData_Final.csv"

data = pd.read_csv(file_path, on_bad_lines='skip')
print(data.columns)
# Initialize the Dash app
app = Dash(__name__)

# Preprocess the data
# Melt certifications data for better grouping
certification_columns = {
    'ba': "Bachelor's Degree",
    'ba2': "Additional Bachelor's Degree",
    'ma': "Master's Degree",
    'ma2': "Additional Master's Degree",
    'phd': "PhD",
    'mfa': "MFA",
    'mfa2': "Additional MFA"
}

certifications = data.melt(
    id_vars=['gender'], 
    value_vars=list(certification_columns.keys()),
    var_name='certification_type', 
    value_name='institution'
).dropna(subset=['institution'])

# Map the abbreviated certification names to full names
certifications['certification_type'] = certifications['certification_type'].map(certification_columns)

# Prepare data for the Choropleth map
# Combine year columns F and G into a single year column for counting
choropleth_data = data.copy()
choropleth_data['grant_year'] = choropleth_data['other_nea_grant'].fillna(choropleth_data['nea_grant_year'])

# Clean grant_year column by handling cases like "1979, 1989" or "1987;1979"
def clean_grant_year(value):
    if isinstance(value, str):
        for delimiter in [',', ';']:
            if delimiter in value:
                return int(value.split(delimiter)[0].strip())  # Take the first year
        try:
            return int(value.strip())  # Handle single-year strings
        except ValueError:
            return None
    try:
        return int(value)  # Handle numeric values
    except ValueError:
        return None

choropleth_data['grant_year'] = choropleth_data['grant_year'].apply(clean_grant_year)
choropleth_data = choropleth_data.dropna(subset=['grant_year'])
choropleth_data = choropleth_data.groupby(['us_state', 'grant_year']).size().reset_index(name='count')

# Prepare data for gender trends over years
gender_trends = data.copy()
gender_trends['grant_year'] = gender_trends['other_nea_grant'].fillna(gender_trends['nea_grant_year'])
gender_trends['grant_year'] = gender_trends['grant_year'].apply(clean_grant_year)
gender_trends = gender_trends.dropna(subset=['grant_year', 'gender'])
gender_trends = gender_trends.groupby(['grant_year', 'gender']).size().reset_index(name='count')

# Prepare data for Pareto chart
gender_by_university = certifications.groupby(['institution', 'gender']).size().reset_index(name='count')
university_totals = gender_by_university.groupby('institution')['count'].sum().reset_index()
university_totals = university_totals.sort_values(by='count', ascending=False)
university_totals['cumulative_percentage'] = university_totals['count'].cumsum() / university_totals['count'].sum() * 100

# Layout of the Dash app
app.layout = html.Div([
    html.H1('NEA Data Visualizations', style={'textAlign': 'center', 'color': 'white'}),

    dcc.Tabs([
        dcc.Tab(label='Certifications', children=[
            html.Div([
                html.H2('Certifications by Gender', style={'color': 'white'}),
                dcc.Graph(id='certifications-by-gender'),
            ], style={'margin': '20px'}),

            html.Div([
                html.H2('Gender by University - Pareto Chart', style={'color': 'white'}),
                dcc.Graph(id='pareto-gender-by-university'),
            ], style={'margin': '20px'})
        ]),

        dcc.Tab(label='Grants', children=[
            html.Div([
                html.H2('NEA Grants Heatmap by State', style={'color': 'white'}),
                dcc.Graph(id='heatmap-grants-by-state'),
            ], style={'margin': '20px'}),

            html.Div([
                html.H2('NEA Grants Choropleth Map', style={'color': 'white'}),
                dcc.Graph(id='choropleth-map'),
            ], style={'margin': '20px'})
        ]),

        dcc.Tab(label='Trends', children=[
            html.Div([
                html.H2('Male vs Female Grants Over the Years', style={'color': 'white'}),
                dcc.Graph(id='gender-trends'),
            ], style={'margin': '20px'})
        ])
    ])
], style={'backgroundColor': '#1e1e1e', 'padding': '20px'})

# Callbacks to generate the graphs
@app.callback(
    Output('certifications-by-gender', 'figure'),
    Input('certifications-by-gender', 'id')
)
def update_certifications_by_gender(_):
    fig = px.histogram(
        certifications, 
        x='certification_type', 
        color='gender',
        title='Count of Certifications by Gender',
        barmode='group'
    )
    fig.update_layout(
        plot_bgcolor='#1e1e1e',
        paper_bgcolor='#1e1e1e',
        font_color='white'
    )
    return fig


@app.callback(
    Output('pareto-gender-by-university', 'figure'),
    Input('pareto-gender-by-university', 'id')
)
def update_pareto_gender_by_university(_):
    fig = go.Figure()

    # Add bar chart
    fig.add_bar(
        x=university_totals['institution'],
        y=university_totals['count'],
        name='Count'
    )

    # Add cumulative percentage line
    fig.add_scatter(
        x=university_totals['institution'],
        y=university_totals['cumulative_percentage'],
        mode='lines+markers',
        name='Cumulative Percentage',
        yaxis='y2'
    )

    # Update layout
    fig.update_layout(
        title='Gender by University - Pareto Chart',
        yaxis=dict(title='Count'),
        yaxis2=dict(
            title='Cumulative Percentage',
            overlaying='y',
            side='right'
        ),
        plot_bgcolor='#1e1e1e',
        paper_bgcolor='#1e1e1e',
        font=dict(color='white')
    )

    return fig

@app.callback(
    Output('heatmap-grants-by-state', 'figure'),
    Input('heatmap-grants-by-state', 'id')
)
def update_heatmap_grants_by_state(_):
    state_grants = data.groupby(['us_state', 'nea_grant_year']).size().reset_index(name='count')
    fig = px.density_heatmap(
        state_grants, 
        x='us_state', 
        y='nea_grant_year', 
        z='count', 
        title='Grants Heatmap by State',
        color_continuous_scale='Viridis'
    )
    fig.update_layout(
        plot_bgcolor='#1e1e1e',
        paper_bgcolor='#1e1e1e',
        font_color='white'
    )
    return fig

@app.callback(
    Output('choropleth-map', 'figure'),
    Input('choropleth-map', 'id')
)
def update_choropleth_map(_):
    fig = px.choropleth(
        choropleth_data,
        locations='us_state',
        locationmode='USA-states',
        color='count',
        scope='usa',
        animation_frame='grant_year',
        title='NEA Grants by State Over Time',
        labels={'count': 'Number of Grants'}
    )
    fig.update_layout(
        geo=dict(bgcolor='rgba(0,0,0,0)'),
        plot_bgcolor='#1e1e1e',
        paper_bgcolor='#1e1e1e',
        font_color='white'
    )
    return fig

@app.callback(
    Output('gender-trends', 'figure'),
    Input('gender-trends', 'id')
)
def update_gender_trends(_):
    fig = px.line(
        gender_trends, 
        x='grant_year', 
        y='count', 
        color='gender',
        title='Male vs Female Grants Over the Years'
    )
    fig.update_layout(
        plot_bgcolor='#1e1e1e',
        paper_bgcolor='#1e1e1e',
        font_color='white'
    )
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)